package com.lee.crawler.gov.strategy;

import java.sql.Timestamp;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public interface ArticleStrategy {

	/**
	 * 获取新闻标题
	 * 
	 * @param content
	 * @return
	 */
	String getTitle(Document content);

	/**
	 * 获取 发布时间
	 * 
	 * @return
	 */
	Timestamp getPublishedTime(Document content);

	/**
	 * 获取新闻内容
	 * 
	 * @param content
	 * @return
	 */
	String getContent(Document content);
	
	/**
	 * 获取包含html简单属性标签的内容
	 * 
	 * @param content
	 * @return
	 */
	String getHtmlContent(Document content);

	default Document getContent(String url) {
		try {
			Document document = Jsoup.connect(url)
					.userAgent(
							"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36")
					.followRedirects(true).timeout(80000).ignoreContentType(true).validateTLSCertificates(false).get();
			return document;
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}

}
